673205ed5fe93f907b2e3e7e77fd8f9d006b4bfb,java/src/org/broadinstitute/sting/playground/gatk/walkers/SomaticMutationWalker.java,SomaticMutationWalker,map,#RefMetaDataTracker#char#LocusContext#,177

Before Change


            //     at least 100 or the LOD score for mutant:ref + mutant:mutant vs ref:ref must
            //     be at least 6.3;
            double tumorLod = t2.getAltVsRef(altAllele);
            if (t2.qualitySums.get(altAllele) < MIN_MUTANT_SUM && tumorLod < TUMOR_LOD_THRESHOLD) {
                if (OUTPUT_FAILURES) {
                    System.out.println("FAILED " + context.getContig() + ":" + context.getPosition() + " due to MAX MM QSCORE TEST." +
                            " LOD was " + tumorReadPile.getAltVsRef(altAllele) +
                            " LOD is now " + t2.getAltVsRef(altAllele) +
                            " QSUM was " + tumorReadPile.qualitySums.get(altAllele) +
                            " QSUM is now " + t2.qualitySums.get(altAllele));
                }
                continue;
            }

            // TODO: using the original pile here since often these artifacts will be supported
            // by those reads that get thrown out!  Maybe that means we don't need the noise filter...
            boolean shouldDisalign =
                    disaligner(context.getPosition(), tumorReadPile, StringUtil.bytesToString(refSeq.getBases()), refStart);

            if (shouldDisalign) {
                if (OUTPUT_FAILURES) {
                    System.out.println("FAILED " + context.getContig() + ":" + context.getPosition() + " due to DISALIGNMENT TEST.");
                }
                continue;
            }






            // (ii) the quality score sum for the mutant base in the normal must be < 50 and the
            //      LOD score for ref:ref vs mutant:ref + mutant:mutant must be at least 2.3.
            double normalLod = normalReadPile.getRefVsAlt(altAllele);
            if ( normalReadPile.qualitySums.get(altAllele) > 50 || normalLod < NORMAL_LOD_THRESHOLD) {
                continue;
            }

            // if we're still here... we've got a somatic mutation!  Output the results
            // and stop looking for mutants!
            if (bedOutput) {
                out.println(
                context.getContig() + "\t" +
                        context.getPosition() + "\t" +
                        context.getPosition() + "\t" +
                        "TScore:" + tumorLod +
                        "__TRefSum: " + tumorReadPile.qualitySums.get(ref) +
                        "__TAltSum: " + tumorReadPile.qualitySums.get(altAllele) +

After Change



    }

    public Integer map(RefMetaDataTracker tracker, char ref, LocusContext context) {
        Map<Character, Integer> midp = new HashMap<Character, Integer>();
        midp.put('A', Integer.MAX_VALUE);
        midp.put('C', Integer.MAX_VALUE);
        midp.put('G', Integer.MAX_VALUE);
        midp.put('T', Integer.MAX_VALUE);

        // First, exclude DBSNP Sites
        for ( ReferenceOrderedDatum datum : tracker.getAllRods() )
        {
            if ( datum != null )
            {
                if ( datum instanceof rodDbSNP) {
                    // we're at a dbsnp site... move along... nothing to see here... 
                    return -1;
                }
            }
        }

        char upRef = Character.toUpperCase(ref);

        // TODO: should we be able to call mutations at bases where ref is N?
        if (upRef == 'N') { return -1; }
        
        List<SAMRecord> reads = context.getReads();

        LocusReadPile tumorReadPile = new LocusReadPile(ref);
        LocusReadPile normalReadPile = new LocusReadPile(ref);

        for ( int i = 0; i < reads.size(); i++ )
        {
            SAMRecord read = reads.get(i);

            if (read.getNotPrimaryAlignmentFlag() ||
                read.getDuplicateReadFlag() ||
                read.getReadUnmappedFlag() ||
                read.getMappingQuality() <= 0

               || (read.getReadPairedFlag() && (!read.getProperPairFlag() || read.getInferredInsertSize() >= MAX_INSERT_SIZE))
                    ) {
                continue;
            }

            String rg = (String) read.getAttribute("RG");
            String sample = read.getHeader().getReadGroup(rg).getSample();

            int offset = context.getOffsets().get(i);

            char base = read.getReadString().charAt(offset);
            if (base == 'N' || base == 'n') { continue; }


            // TODO: build a pile of reads and offsets, then pass that into a
            // constructor for the normalGL class
            // that way, we can build a different pile of reads later on and extract the genotype
            if (normalSampleName.equals(sample)) {
                normalReadPile.add(read, offset);

            } else if (tumorSampleName.equals(sample)) {
                tumorReadPile.add(read, offset);


                int midDist = Math.abs((int)(read.getReadLength() / 2) - offset);
                if (midDist < midp.get(base)) { midp.put(base, midDist); }

            } else {
                throw new RuntimeException("Unknown Sample Name: " + sample);
            }
        }

        // pretest: if the sum of the quality scores for all non-ref alleles < 60, just quit looking now
        if (tumorReadPile.qualitySums.getOtherQualities(ref) < MIN_MUTANT_SUM_PRETEST) {
            return -1;
        }


        // Test each of the poosible alternate alleles

        for (char altAllele : new char[]{'A','C','G','T'}) {
            if (altAllele == upRef) { continue; }


            // (i) either an adjusted quality score sum in the tumor for the mutant base must be
            //     at least 100 or the LOD score for mutant:ref + mutant:mutant vs ref:ref must
            //     be at least 6.3;
            int mutantSum = tumorReadPile.qualitySums.get(altAllele);
            int refSum = tumorReadPile.qualitySums.get(ref);

            if (tumorReadPile.getAltVsRef(altAllele) >= TUMOR_LOD_THRESHOLD
//                    ||
//                (mutantSum >= MIN_MUTANT_SUM && (float)mutantSum / (float) refSum >= 0.05f)
                ) {
                // yep -- just fall through... easier to think about this way!
            } else {
                continue;
            }

            // make sure we've seen at least 1 obs of the alternate allele within 20bp of the read-middle
            boolean failedMidpointCheck = midp.get(altAllele) > 20;
//            if (failedMidpointCheck) {
//                out.println("Rejecting due to midpoint check!");
//                return 0;
//            }


            // do a MSA to figure out if the alternate reads comes from a cluster of reads with more
            // than one alternate allele (hints at being an alignment artifact)
            ReferenceSequence refSeq;
            // TODO: don't hardcode.  Make this the max read length in the pile 
            long refStart = context.getLocation().getStart() - 150;
            //tumorReadPile.offsets.get(0);
            long refStop = context.getLocation().getStart() + 150;
            try {
                IndexedFastaSequenceFile seqFile = new IndexedFastaSequenceFile(getToolkit().getArguments().referenceFile);
                refSeq = seqFile.getSubsequenceAt(context.getContig(),refStart, refStop);

            } catch (IOException ioe) {
                throw new RuntimeException(ioe);
            }

//            System.out.println("TESTING " + context.getContig() + ":" + context.getPosition()); 
            LocusReadPile t2 = filterHighMismatchScoreReads(tumorReadPile, StringUtil.bytesToString(refSeq.getBases()), refStart);

            // TEST the LOD score again!
            // TODO: extract this since we'll do it multiple times...

            // (i) either an adjusted quality score sum in the tumor for the mutant base must be
            //     at least 100 or the LOD score for mutant:ref + mutant:mutant vs ref:ref must
            //     be at least 6.3;
            double tumorLod = t2.getAltVsRef(altAllele);
            if (mode.equals("full") && t2.qualitySums.get(altAllele) < MIN_MUTANT_SUM && tumorLod < TUMOR_LOD_THRESHOLD) {
                if (OUTPUT_FAILURES) {

                    String msg = "FAILED  due to MAX MM QSCORE TEST." +
                            " LOD was " + tumorReadPile.getAltVsRef(altAllele) +
                            " LOD is now " + t2.getAltVsRef(altAllele) +
                            " QSUM was " + tumorReadPile.qualitySums.get(altAllele) +
                            " QSUM is now " + t2.qualitySums.get(altAllele);

                    System.out.println(
                            context.getContig() + "\t" +
                                    context.getPosition() + "\t" +
                                    context.getPosition() + "\t"
                            + msg.replace(' ','_')
                            );
                }
                continue;
            }

            // TODO: using the original pile here since often these artifacts will be supported
            // by those reads that get thrown out!  Maybe that means we don't need the noise filter...
            boolean shouldDisalign =
                    disaligner(context.getPosition(), tumorReadPile, StringUtil.bytesToString(refSeq.getBases()), refStart);

            if (mode.equals("full") && shouldDisalign) {
                if (OUTPUT_FAILURES) {
                    String msg = "FAILED due to DISALIGNMENT TEST.";

                    System.out.println(
                            context.getContig() + "\t" +
                                    context.getPosition() + "\t" +
                                    context.getPosition() + "\t"
                            + msg.replace(' ','_')
                            );